#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time   :  2019/7/4 10:50
@Author :  geqh
@file   :  tag_EN.py
"""
import jieba.posseg as pseg
from utils.log import *
from utils.confPaser import *
import os


def load_dict():
    dic_path = os.path.join(project_path, "conf/dic_cn.txt")
    map = {}
    with open(dic_path, "r", encoding="utf8") as f:
        lines = f.readlines()
    for line in lines:
        one = line.split("-")
        items = one[1].split(";")
        for item in items:
            map[item.strip("\n")] = one[0]
    logger.info("load_dict:" + dic_path)
    return map


def tagCN(text, dict_map):
    words = pseg.cut(text)
    tags = []
    for w in words:
        tag = str(w.flag).upper()
        if dict_map.get(tag, 0):
            tags.append({"word": w.word, "tag": tag, "pos": dict_map[tag]})
        else:
            tags.append({"word": w.word, "tag": tag, "pos": "X"})
    logger.info(tags)
    return tags
